1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

1.2 The Data

DARWIN <- read.csv("~/GitHub/FCA/Data/DARWIN/DARWIN.csv")
rownames(DARWIN) <- DARWIN$ID
DARWIN$ID <- NULL
DARWIN$class <- 1*(DARWIN$class=="P")
print(table(DARWIN$class))
#> 
#>  0  1 
#> 85 89

DARWIN[,1:ncol(DARWIN)] <- sapply(DARWIN,as.numeric)

signedlog <- function(x) { return (sign(x)*log(abs(1.0e12*x)+1.0))}
whof <- !(colnames(DARWIN) %in% c("class"));
DARWIN[,whof] <- signedlog(DARWIN[,whof])

1.2.0.1 Standarize the names for the reporting

studyName <- "DARWIN"
dataframe <- DARWIN
outcome <- "class"

TopVariables <- 10

thro <- 0.80
cexheat = 0.15

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
174 450
pander::pander(table(dataframe[,outcome]))
0 1
85 89

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9994118

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 450 , Uni p: 0.006594985 , Uncorrelated Base: 139 , Outcome-Driven Size: 0 , Base Size: 139 
#> 
#> 
 1 <R=0.999,r=0.975,N=  147>, Top: 47( 1 )[ 1 : 47 Fa= 46 : 0.975 ]( 46 , 72 , 0 ),<|>Tot Used: 118 , Added: 72 , Zero Std: 0 , Max Cor: 0.999
#> 
 2 <R=0.999,r=0.975,N=  147>, Top: 7( 1 )[ 1 : 7 Fa= 53 : 0.975 ]( 7 , 17 , 46 ),<|>Tot Used: 140 , Added: 17 , Zero Std: 0 , Max Cor: 0.996
#> 
 3 <R=0.996,r=0.973,N=  147>, Top: 10( 1 )[ 1 : 10 Fa= 60 : 0.973 ]( 10 , 10 , 53 ),<|>Tot Used: 155 , Added: 10 , Zero Std: 0 , Max Cor: 0.973
#> 
 4 <R=0.973,r=0.936,N=   91>, Top: 41( 3 )[ 1 : 41 Fa= 92 : 0.936 ]( 40 , 49 , 60 ),<|>Tot Used: 226 , Added: 49 , Zero Std: 0 , Max Cor: 0.956
#> 
 5 <R=0.956,r=0.928,N=   91>, Top: 10( 1 )[ 1 : 10 Fa= 98 : 0.928 ]( 10 , 10 , 92 ),<|>Tot Used: 239 , Added: 10 , Zero Std: 0 , Max Cor: 0.927
#> 
 6 <R=0.927,r=0.914,N=   91>, Top: 11( 1 )[ 1 : 11 Fa= 103 : 0.914 ]( 10 , 10 , 98 ),<|>Tot Used: 246 , Added: 10 , Zero Std: 0 , Max Cor: 0.912
#> 
 7 <R=0.912,r=0.906,N=   91>, Top: 5( 1 )[ 1 : 5 Fa= 103 : 0.906 ]( 5 , 5 , 103 ),<|>Tot Used: 247 , Added: 5 , Zero Std: 0 , Max Cor: 0.906
#> 
 8 <R=0.906,r=0.853,N=   90>, Top: 42( 1 )[ 1 : 42 Fa= 135 : 0.853 ]( 40 , 44 , 103 ),<|>Tot Used: 313 , Added: 44 , Zero Std: 0 , Max Cor: 0.894
#> 
 9 <R=0.894,r=0.847,N=   90>, Top: 10( 1 )[ 1 : 10 Fa= 137 : 0.847 ]( 10 , 10 , 135 ),<|>Tot Used: 317 , Added: 10 , Zero Std: 0 , Max Cor: 0.845
#> 
 10 <R=0.845,r=0.823,N=   90>, Top: 18( 1 )[ 1 : 18 Fa= 142 : 0.823 ]( 16 , 16 , 137 ),<|>Tot Used: 330 , Added: 16 , Zero Std: 0 , Max Cor: 0.926
#> 
 11 <R=0.926,r=0.863,N=   90>, Top: 1( 1 )[ 1 : 1 Fa= 142 : 0.863 ]( 1 , 1 , 142 ),<|>Tot Used: 330 , Added: 1 , Zero Std: 0 , Max Cor: 0.822
#> 
 12 <R=0.822,r=0.800,N=   27>, Top: 14( 1 )[ 1 : 14 Fa= 149 : 0.800 ]( 13 , 13 , 142 ),<|>Tot Used: 343 , Added: 13 , Zero Std: 0 , Max Cor: 0.837
#> 
 13 <R=0.837,r=0.800,N=   27>, Top: 2( 1 )[ 1 : 2 Fa= 150 : 0.800 ]( 2 , 2 , 149 ),<|>Tot Used: 344 , Added: 2 , Zero Std: 0 , Max Cor: 0.797
#> 
 14 <R=0.797,r=0.800,N=    0>
#> 
 [ 14 ], 0.7971926 Decor Dimension: 344 Nused: 344 . Cor to Base: 184 , ABase: 89 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

692

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

122

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

4.57

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

4.59

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.7971926

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")

100 : mean_jerk_in_air6 200 : disp_index12 300 : mean_speed_in_air17 400 : gmrt_on_paper23




univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

100 : La_mean_jerk_in_air6 200 : La_disp_index12 300 : La_mean_speed_in_air17 400 : La_gmrt_on_paper23

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
total_time23 37.2 0.503 36.7 0.484 1.03e-05 0.863
total_time15 38.1 0.875 37.1 0.421 5.44e-01 0.844
air_time23 36.6 0.626 35.9 0.656 6.92e-03 0.844
air_time15 37.7 1.094 36.6 0.615 5.06e-01 0.829
total_time17 38.5 0.681 37.8 0.614 4.00e-03 0.824
paper_time23 36.4 0.439 36.0 0.231 6.72e-01 0.814
air_time17 37.9 0.914 37.0 0.795 3.52e-02 0.806
paper_time17 37.6 0.395 37.2 0.439 1.28e-03 0.796
total_time6 37.1 0.777 36.4 0.447 7.16e-01 0.790
air_time16 36.4 1.131 35.2 0.867 9.38e-01 0.787


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
air_time23 36.612 0.626 35.85799 0.656 6.92e-03 0.844
air_time15 37.720 1.094 36.60743 0.615 5.06e-01 0.829
air_time17 37.912 0.914 37.00002 0.795 3.52e-02 0.806
air_time16 36.357 1.131 35.24001 0.867 9.38e-01 0.787
disp_index23 16.116 0.194 15.92591 0.166 3.43e-01 0.787
air_time6 36.694 0.899 35.81144 0.665 7.39e-01 0.784
air_time7 36.742 0.758 36.09042 0.938 5.42e-04 0.779
La_paper_time3 0.284 0.391 -0.00243 0.190 1.12e-01 0.776
gmrt_in_air7 32.948 0.405 33.38157 0.396 9.99e-01 0.775
air_time2 36.256 1.176 35.08828 1.002 2.05e-01 0.773
La_mean_speed_on_paper2 -0.121 0.210 0.00476 0.155 6.60e-05 0.730
La_mean_speed_on_paper3 -0.125 0.239 0.03519 0.147 8.09e-05 0.727
La_mean_acc_on_paper3 0.262 0.387 -0.01629 0.297 4.33e-01 0.721
La_mean_speed_on_paper13 -5.051 0.068 -5.01659 0.107 2.18e-05 0.718

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.17 222 0.493


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
air_time23 36.612 0.626 35.85799 0.656 6.92e-03 0.844 0.844 1
air_time15 37.720 1.094 36.60743 0.615 5.06e-01 0.829 0.829 1
air_time17 37.912 0.914 37.00002 0.795 3.52e-02 0.806 0.806 1
air_time16 36.357 1.131 35.24001 0.867 9.38e-01 0.787 0.787 1
disp_index23 16.116 0.194 15.92591 0.166 3.43e-01 0.787 0.787 1
air_time6 36.694 0.899 35.81144 0.665 7.39e-01 0.784 0.784 1
air_time7 36.742 0.758 36.09042 0.938 5.42e-04 0.779 0.779 1
La_paper_time3 -0.954disp_index3 + 1.000paper_time3 -0.596*pressure_mean3 0.284 0.391 -0.00243 0.190 1.12e-01 0.776 0.715 -2
gmrt_in_air7 32.948 0.405 33.38157 0.396 9.99e-01 0.775 0.775 1
air_time2 36.256 1.176 35.08828 1.002 2.05e-01 0.773 0.773 1
La_mean_speed_on_paper2 -0.876gmrt_on_paper2 + 1.000mean_speed_on_paper2 -0.121 0.210 0.00476 0.155 6.60e-05 0.730 0.720 -1
La_mean_speed_on_paper3 -0.877gmrt_on_paper3 + 1.000mean_speed_on_paper3 + 0.000*pressure_mean3 -0.125 0.239 0.03519 0.147 8.09e-05 0.727 0.291 -2
La_mean_acc_on_paper3 + 1.000mean_acc_on_paper3 -0.718pressure_mean3 0.262 0.387 -0.01629 0.297 4.33e-01 0.721 0.691 0
mean_speed_on_paper2 NA 27.813 0.492 27.88786 3.091 5.50e-13 0.720 0.720 NA
La_mean_speed_on_paper13 -1.035gmrt_on_paper13 + 1.000mean_speed_on_paper13 -5.051 0.068 -5.01659 0.107 2.18e-05 0.718 0.626 -1
paper_time3 NA 36.536 0.657 35.35856 5.538 3.33e-16 0.715 0.715 NA
mean_acc_on_paper3 NA 25.438 0.310 24.65355 3.859 0.00e+00 0.691 0.691 NA
disp_index3 NA 16.086 0.639 15.59248 2.469 4.98e-13 0.669 0.669 4
mean_speed_on_paper13 NA 28.413 0.557 28.66694 0.421 9.93e-01 0.626 0.626 NA
gmrt_on_paper13 NA 32.328 0.536 32.54115 0.400 9.81e-01 0.606 0.606 2
gmrt_on_paper3 NA 32.296 0.519 31.78783 4.982 3.33e-16 0.358 0.358 NA
pressure_mean3 NA 35.043 0.300 34.33865 5.363 0.00e+00 0.341 0.341 NA
gmrt_on_paper2 NA 31.879 0.494 31.82043 3.524 2.06e-13 0.337 0.337 NA
mean_speed_on_paper3 NA 28.191 0.528 27.90558 4.378 6.66e-16 0.291 0.291 NA

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 78 7
1 7 82
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.5115 0.4347 0.588
    tp 0.5115 0.4347 0.588
    se 0.9213 0.8446 0.968
    sp 0.9176 0.8377 0.966
    diag.ac 0.9195 0.8687 0.955
    diag.or 130.5306 43.7750 389.223
    nndx 1.1919 1.0706 1.466
    youden 0.8390 0.6823 0.934
    pv.pos 0.9213 0.8446 0.968
    pv.neg 0.9176 0.8377 0.966
    lr.pos 11.1878 5.4882 22.807
    lr.neg 0.0857 0.0420 0.175
    p.rout 0.4885 0.4121 0.565
    p.rin 0.5115 0.4347 0.588
    p.tpdn 0.0824 0.0338 0.162
    p.tndp 0.0787 0.0322 0.155
    p.dntp 0.0787 0.0322 0.155
    p.dptn 0.0824 0.0338 0.162
  • tab:

      Outcome + Outcome - Total
    Test + 82 7 89
    Test - 7 78 85
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.920 0.869 0.955
3 se 0.921 0.845 0.968
4 sp 0.918 0.838 0.966
6 diag.or 130.531 43.775 389.223

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 80 5
1 10 79
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.4828 0.4065 0.560
    tp 0.5115 0.4347 0.588
    se 0.8876 0.8031 0.945
    sp 0.9412 0.8680 0.981
    diag.ac 0.9138 0.8618 0.951
    diag.or 126.4000 41.3399 386.478
    nndx 1.2065 1.0806 1.490
    youden 0.8288 0.6711 0.925
    pv.pos 0.9405 0.8665 0.980
    pv.neg 0.8889 0.8051 0.945
    lr.pos 15.0899 6.4267 35.431
    lr.neg 0.1194 0.0664 0.215
    p.rout 0.5172 0.4404 0.594
    p.rin 0.4828 0.4065 0.560
    p.tpdn 0.0588 0.0194 0.132
    p.tndp 0.1124 0.0552 0.197
    p.dntp 0.0595 0.0196 0.133
    p.dptn 0.1111 0.0546 0.195
  • tab:

      Outcome + Outcome - Total
    Test + 79 5 84
    Test - 10 80 90
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.914 0.862 0.951
3 se 0.888 0.803 0.945
4 sp 0.941 0.868 0.981
6 diag.or 126.400 41.340 386.478

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 70 15
1 11 78
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.534 0.4575 0.610
    tp 0.511 0.4347 0.588
    se 0.876 0.7896 0.937
    sp 0.824 0.7257 0.898
    diag.ac 0.851 0.7888 0.900
    diag.or 33.091 14.2531 76.826
    nndx 1.429 1.1985 1.941
    youden 0.700 0.5153 0.834
    pv.pos 0.839 0.7480 0.907
    pv.neg 0.864 0.7700 0.930
    lr.pos 4.966 3.1169 7.913
    lr.neg 0.150 0.0856 0.263
    p.rout 0.466 0.3897 0.543
    p.rin 0.534 0.4575 0.610
    p.tpdn 0.176 0.1023 0.274
    p.tndp 0.124 0.0633 0.210
    p.dntp 0.161 0.0932 0.252
    p.dptn 0.136 0.0698 0.230
  • tab:

      Outcome + Outcome - Total
    Test + 78 15 93
    Test - 11 70 81
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.851 0.789 0.900
3 se 0.876 0.790 0.937
4 sp 0.824 0.726 0.898
6 diag.or 33.091 14.253 76.826


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 69 16
1 3 86
  pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.5862 0.50921 0.6602
    tp 0.5115 0.43471 0.5879
    se 0.9663 0.90464 0.9930
    sp 0.8118 0.71239 0.8884
    diag.ac 0.8908 0.83474 0.9330
    diag.or 123.6250 34.60851 441.6006
    nndx 1.2853 1.13456 1.6207
    youden 0.7781 0.61703 0.8814
    pv.pos 0.8431 0.75780 0.9076
    pv.neg 0.9583 0.88303 0.9913
    lr.pos 5.1334 3.29564 7.9960
    lr.neg 0.0415 0.01359 0.1269
    p.rout 0.4138 0.33978 0.4908
    p.rin 0.5862 0.50921 0.6602
    p.tpdn 0.1882 0.11160 0.2876
    p.tndp 0.0337 0.00701 0.0954
    p.dntp 0.1569 0.09240 0.2422
    p.dptn 0.0417 0.00868 0.1170
  • tab:

      Outcome + Outcome - Total
    Test + 86 16 102
    Test - 3 69 72
    Total 89 85 174
  • method: exact

  • digits: 2

  • conf.level: 0.95

  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.891 0.835 0.933
3 se 0.966 0.905 0.993
4 sp 0.812 0.712 0.888
6 diag.or 123.625 34.609 441.601
  par(op)